#######################################################################
#Name of code file: narrative_analysis.R

#Data In: repost_data.csv

#Data Out: Table 1 and Table A3
######################################################################

#Load packages
library(plyr)
library(ggplot2)
library(readr)
library(xtable)
library(stringr)
library(tidyr)

#Set Working Directory
setwd("~/Dropbox/diresta_grossman_siegel_replication/")

#Read in Data
data<-read_csv("data/repost_data.csv")

data$one <- c(1)

#Summarize data by post id
data_grouped <- ddply(data, ~post_id, summarise, number_reposts=sum(one),
                      public_shares=sum(times_posted_to_fb_pages_groups),
                      total_followers=sum(na.omit(total_followers_of_pages_groups_fb)),
                      total_interactions_public_private=sum(na.omit(total_interactions_public_private)))

#Table 1                  
data_grouped <- data_grouped[order(-data_grouped$number_reposts),]
data_grouped_subset <- subset(data_grouped, data_grouped$number_reposts > 14)
xtable(data_grouped_subset)

#Extract domains from full URLs
urls<-as.data.frame(data$repost_url)
urls$url<-urls$`data$repost_url`
urls<-separate(urls, url, into=c("url1", "url2", "url3", "url4"), sep=",", remove=FALSE)
urls<-as.data.frame(c(urls$url1, urls$url2, urls$url3, urls$url4))
urls$clean<-paste0("", gsub("[][]", "", urls$`c(urls$url1, urls$url2, urls$url3, urls$url4)`), "")
urls<-na.omit(as.data.frame(urls$clean))
urls<-as.data.frame(urls[!apply(urls== "", 1, all),])
urls<-subset(urls, urls$`urls[!apply(urls == "", 1, all), ]`!="NA")
urls$url<-urls$`urls[!apply(urls == "", 1, all), ]`
urls<-separate(urls, url, into=c("http", "gap", "domain"), sep="/", remove=FALSE)

domains <- as.data.frame(table(urls$domain))

#Make Appendix Table 3
top_domains_more_than_nine_times <- subset(domains, domains$Freq >9)

top_domains_more_than_nine_times <-  top_domains_more_than_nine_times[order(-top_domains_more_than_nine_times$Freq),]
print(xtable(top_domains_more_than_nine_times))